To run an optimization I need for one district (Neukölln?) - residential buildings ~> children per building ~> children age 5/6 per block - capacity per school
Load all base data
sampled_buildings = read_csv('output/sampled_buildings.csv')
Parsed with column specification:
cols(
OI = col_character(),
long = col_double(),
lat = col_double(),
BLK = col_character()
)
bez = readOGR('download/RBS_OD_BEZ_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON
Source: "download/RBS_OD_BEZ_2015_12.geojson", layer: "OGRGeoJSON"
with 13 features
It has 2 fields
blk = readOGR('download/RBS_OD_BLK_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON
Source: "download/RBS_OD_BLK_2015_12.geojson", layer: "OGRGeoJSON"
with 15720 features
It has 4 fields
lor = readOGR('download/RBS_OD_LOR_2015_12.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON
Source: "download/RBS_OD_LOR_2015_12.geojson", layer: "OGRGeoJSON"
with 447 features
It has 8 fields
re_schulstand = readOGR('download/re_schulstand.geojson', layer = 'OGRGeoJSON')
OGR data source with driver: GeoJSON
Source: "download/re_schulstand.geojson", layer: "OGRGeoJSON"
with 709 features
It has 20 fields
Load the huge route matrix
routen_matrix = read_csv('output/routen_matrix.csv') %>% mutate(distance=distance*60)
Parsed with column specification:
cols(
src = col_character(),
dst = col_character(),
duration = col_double(),
distance = col_double()
)
Load school capacities and population data
TODO - neue Daten von Torres
kapas = read_csv('download/anmeldezahlen.csv') %>% filter(grepl('G', Schulnummer)) %>% filter(!is.na(`Plätze`))
Parsed with column specification:
cols(
Bezirk = col_character(),
Schulnummer = col_character(),
Schulname = col_character(),
Plätze = col_integer(),
Anmeldungen = col_character()
)
einwohner_lor = read_delim('download/EWR201512E_Matrix.csv', delim=';')
Parsed with column specification:
cols(
.default = col_character(),
ZEIT = col_integer(),
STADTRAUM = col_integer(),
E_E = col_number(),
E_EM = col_number(),
E_EW = col_number(),
E_E50_55 = col_number(),
E_E25U55 = col_number(),
E_E55U65 = col_number(),
E_E65U80 = col_number()
)
See spec(...) for full column specifications.
re_schulstand_df = re_schulstand %>% as.data.frame() %>% rename(lon=coords.x1, lat=coords.x2)
re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>%
group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n()))
Joining, by = "Bezirk"
Wir wählen einen Bezirk, wo wir von allen Schulen Kapas haben.
re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% mutate(BEZIRK=enc2utf8(as.character(BEZIRK))) %>% group_by(BEZIRK) %>% summarise(`Anzahl Schulen` = n()) %>%
rename(Bezirk=BEZIRK) %>% left_join(kapas %>% group_by(Bezirk) %>% summarise(`Mit Kapazität` = n())) %>% filter(`Anzahl Schulen` == `Mit Kapazität`)
Joining, by = "Bezirk"
bezirk = 'Tempelhof-Schöneberg'
schulen_mit_kapa = kapas %>% filter(Bezirk == bezirk) %>% .$Schulnummer
schulen_mit_kapa
[1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
[9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01
07G02
07G03
07G05
07G06
07G07
07G10
07G12
07G13
07G14
07G15
07G16
07G17
07G18
07G19
07G20
07G21
07G22
07G23
07G24
07G25
07G26
07G27
07G28
07G29
07G30
07G31
07G32
07G34
07G35
07G36
07G37
grundschulen = re_schulstand %>% as.data.frame() %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK == bezirk) %>% .$spatial_name
'In Anmeldeliste, fehlt in Schulstand'
[1] "In Anmeldeliste, fehlt in Schulstand"
In Anmeldeliste, fehlt in Schulstand
setdiff(schulen_mit_kapa, grundschulen)
character(0)
'In re_schulstand, fehlt in Anmeldeliste'
[1] "In re_schulstand, fehlt in Anmeldeliste"
In re_schulstand, fehlt in Anmeldeliste
setdiff(grundschulen, schulen_mit_kapa)
character(0)
OMG nur bei Mitte mappt das perfekt! Alle anderen erfordern Nachforschungen.
map = get_map('Berlin')
Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Berlin&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Berlin&sensor=false
#re_schulstand[re_schulstand$spatial_name %in% setdiff(grundschulen, schulen_mit_anmeldungen),] %>% View()
re_schulstand_df = re_schulstand_df %>% left_join(kapas, by=c('spatial_name'='Schulnummer'))
Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk) %>% mutate(has.capa=is.na(`Plätze`))
ggmap(map) + geom_point(aes(lon, lat, color=has.capa), data=data) +
coord_map(xlim=c(min(data$lon)-0.01, max(data$lon)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
Ignorieren das Problem und tun so, als ob nur diese Schulen relevant sind:
relevant_schools = re_schulstand_df %>% filter(grepl('G', spatial_name)) %>% filter(BEZIRK==bezirk & !is.na(`Plätze`)) %>% .$spatial_name
relevant_schools
[1] "07G01" "07G02" "07G03" "07G05" "07G06" "07G07" "07G10" "07G12"
[9] "07G13" "07G14" "07G15" "07G16" "07G17" "07G18" "07G19" "07G20"
[17] "07G21" "07G22" "07G23" "07G24" "07G25" "07G26" "07G27" "07G28"
[25] "07G29" "07G30" "07G31" "07G32" "07G34" "07G35" "07G36" "07G37"
07G01
07G02
07G03
07G05
07G06
07G07
07G10
07G12
07G13
07G14
07G15
07G16
07G17
07G18
07G19
07G20
07G21
07G22
07G23
07G24
07G25
07G26
07G27
07G28
07G29
07G30
07G31
07G32
07G34
07G35
07G36
07G37
einwohler_lor finden wir die durchschnittliche Kinderzahl pro _Wohngebäudedf_bez = as.data.frame(bez)
df_lor = as.data.frame(lor)
df_blk = as.data.frame(blk)
bez_id = filter(df_bez, BEZNAME == bezirk)$BEZ
relevant_lors = df_lor %>% filter(BEZ == bez_id)
relevant_blks = df_blk %>% filter(BEZ == bez_id)
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=lor[lor$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez, color='red')
Regions defined for each Polygons
Regions defined for each Polygons
ggplot() + geom_path(aes(x=long, y=lat, group=group), data=blk[blk$BEZ == bez_id,]) + coord_map() + geom_path(aes(x=long, y=lat, group=group), data=bez[bez$BEZ == bez_id,], color='red')
Regions defined for each Polygons
Regions defined for each Polygons
Schon erledigt in sampled_buildings.
TODO neue Daten von Torres
relevant_ewr = einwohner_lor %>% select(RAUMID, E_E06_07) %>% filter(RAUMID %in% relevant_lors$PLR) %>%
mutate(kids=as.numeric(gsub(',','.',E_E06_07))) %>% as.data.frame()
data = tidy(lor[lor$BEZ == bez_id,], region='PLR') %>% inner_join(relevant_ewr, by=c('id'='RAUMID'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
For each LOR distribute the children according to the number of people living in that block
kids_in_blks = relevant_blks %>% group_by(PLR) %>% mutate(EinwRatio = Einw/sum(Einw)) %>% ungroup %>% left_join(relevant_ewr, by=c('PLR'='RAUMID')) %>% mutate(kids = EinwRatio*kids) %>% select(BEZ, PLR, BLK, Einw, kids) %>% as.data.frame()
Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
row.names(kids_in_blks) = kids_in_blks$BLK
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(kids_in_blks, by=c('id'='BLK')) %>% mutate(kids=ifelse(kids==0, NA, kids), Einw=ifelse(Einw==0, NA, Einw))
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
0
[1] 0
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=kids), data=data) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
relevant_kapas = kapas %>% select(Schulnummer, Kapa=`Plätze`) %>% filter(Schulnummer %in% relevant_schools) %>% as.data.frame()
#row.names(relevant_kapas) = relevant_kapas$Schulnummer
'Summe Kapas'
[1] "Summe Kapas"
Summe Kapas
relevant_kapas %>% .$Kapa %>% sum
[1] 2584
'Summe Kapas + extra'
[1] "Summe Kapas + extra"
Summe Kapas + extra
relevant_kapas %>% .$Kapa_extra %>% sum
[1] 0
'Anmeldungen'
[1] "Anmeldungen"
Anmeldungen
kapas %>% mutate(Anmeldungen = as.numeric(gsub('[^0-9]', '', Anmeldungen))) %>% filter(Schulnummer %in% relevant_schools) %>% .$Anmeldungen %>% sum
[1] 2752
'Kids laut Statistik'
[1] "Kids laut Statistik"
Kids laut Statistik
kids_in_blks$kids %>% sum
[1] 2855
relevant_ewr$kids %>% sum
[1] 2855
find block of each residential building
residential_buildings_blocks = sampled_buildings %>% inner_join(df_blk) %>% filter(BEZ == bez_id)
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
residential_buildings_blocks
routes_from_blks = residential_buildings_blocks %>%
left_join(routen_matrix %>% filter(dst %in% relevant_schools), by=c('OI'='src'))
head(routes_from_blks)
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% inner_join(routes_from_blks %>% group_by(BLK) %>% summarise(n=n()), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group), fill='red', data=data) +
#geom_point(aes(x=lon, y=lat), data=rb_df, color='black', size=0.01) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
travel_from_blks = routes_from_blks %>% as.data.frame() %>% group_by(BLK, dst) %>% summarise(min=min(distance), avg=mean(distance), med=median(distance), max=max(distance)) %>% ungroup
travel_from_blks
travel_from_blks %>% write_csv('output/travel_from_blks.csv')
Color Blocks by avg distance
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(travel_from_blks %>% group_by(BLK) %>% top_n(1, -avg), by=c('id'='BLK'))
ggmap(map) + geom_polygon(aes(x=long, y=lat, group=group, fill=-avg), data=data) +
geom_point(aes(lon, lat), color='red', data = re_schulstand_df %>% filter(BEZIRK==bezirk & SCHULART=='Grundschule')) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01))
travel_matrix = travel_from_blks %>% select(BLK, dst, avg) %>% spread(dst, avg)
dim(travel_matrix)
[1] 1012 33
travel_matrix
optim_kapas = relevant_kapas
optim_kids_in_blks = kids_in_blks %>% filter(kids > 0) %>% inner_join(travel_matrix, by='BLK') %>% select(BLK, kids) %>% mutate(kids=kids*0.88)
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
factor and character vector, coercing into character vector
nrow(optim_kids_in_blks)
[1] 1012
nrow(optim_kapas)
[1] 32
select_schools = as.character(optim_kapas$Schulnummer)
select_blks = as.character(optim_kids_in_blks$BLK)
optim_matrix = inner_join(optim_kids_in_blks, travel_matrix, by='BLK')[select_schools]
dim(optim_matrix)
[1] 1012 32
optim_kapas$Kapa %>% sum
[1] 2584
optim_kids_in_blks$kids %>% sum
[1] 2503.5
optim_matrix %>% write_csv('output/optim_matrix.csv')
optim_kapas %>% write_csv('output/optim_kapas.csv')
optim_kids_in_blks %>% write_csv('output/optim_blks.csv')
solution = optim_matrix %>% mutate(BLK=optim_kids_in_blks$BLK) %>% gather(school, dist, -BLK) %>% group_by(BLK) %>% top_n(1, -dist) %>% ungroup
optim_matrix %>% t %>% as.data.frame %>% summarise_each(funs(min)) %>% sum()
[1] 47204364
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
guides(color=F, fill=F)
solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_each("max", -BLK, -school, -dist)
julia jump.jl
solution = read_csv('output/solution_jump.csv') %>% setNames(., select_schools) %>%
mutate(BLK=select_blks) %>% gather(school, assigned, -BLK) %>% filter(assigned > 0)
Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
solines = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school')) %>% inner_join(cbind(as.data.frame(coordinates(blk[blk$BEZ == bez_id,])), blk[blk$BEZ == bez_id,]@data['BLK']))
Joining, by = "BLK"
Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
character vector and factor, coercing into character vector
data = tidy(blk[blk$BEZ == bez_id,], region='BLK') %>% left_join(solution, by=c('id'='BLK'))
ggmap(map, darken = c(0.5, 'white')) + geom_polygon(aes(x=long, y=lat, group=group, fill=school), data=data) +
geom_segment(aes(x=V1,y=V2,xend=lon,yend=lat), data=solines, size=0.3) +
geom_point(aes(lon, lat), color='black', size=2, data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
geom_point(aes(lon, lat, color=spatial_name), data = re_schulstand_df %>% inner_join(solution, by=c('spatial_name'='school'))) +
coord_map(xlim=c(min(data$long)-0.01, max(data$long)+0.01), ylim=c(min(data$lat)-0.01, max(data$lat)+0.01)) +
guides(color=F, fill=F)
solution %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>% summarise_all(max)
sum(read_csv('output/solution_jump.csv') * optim_matrix)
Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
[1] 50098818
sum(read_csv('output/solution_jump.csv') * optim_matrix * optim_kids_in_blks$kids)
Parsed with column specification:
cols(
.default = col_double()
)
See spec(...) for full column specifications.
[1] 134794056
library(formattable)
solution %>% inner_join(optim_kids_in_blks, by='BLK') %>% inner_join(travel_from_blks, by=c('BLK'='BLK', 'school'='dst')) %>%
group_by(school) %>% summarise(
kids=sum(kids),
num_blocks=n(),
min_dist=min(min),
avg_dist=mean((kids*avg)/sum(kids)),
max_dist=max(max)
) %>%
inner_join(relevant_kapas, by=c('school'='Schulnummer')) %>%
mutate(
utilization=kids/Kapa
) %>% select(
Schule=school,
`Blöcke`=num_blocks,
Kapazität=Kapa,
Kinder=kids,
Auslastung=utilization,
`Weg (min)`=min_dist,
`Weg (Ø)`=avg_dist,
`Weg (max)`=max_dist
) %>%
formattable(
list(
Kinder = formatter("span", x ~ digits(x, 2)),
Auslastung = formatter("span",
style = x ~ style(color = ifelse(x < 1, "green", "red")),
x ~ icontext(ifelse(x < 1, "ok", "remove"), percent(x))
),
`Weg (Ø)` = proportion_bar("lightblue"),
`Weg (min)` = proportion_bar("lightblue"),
`Weg (max)` = proportion_bar("lightblue")
)
)
| Schule | Blöcke | Kapazität | Kinder | Auslastung | Weg (min) | Weg (Ø) | Weg (max) |
|---|---|---|---|---|---|---|---|
| 07G01 | 13 | 66 | 65.87 | 99.80% | 10394.854 | 41208.35 | 67553.24 |
| 07G02 | 25 | 98 | 68.45 | 69.84% | 9854.407 | 43308.79 | 95793.37 |
| 07G03 | 18 | 73 | 72.98 | 99.97% | 5612.879 | 34905.84 | 67585.35 |
| 07G05 | 17 | 78 | 77.32 | 99.13% | 2974.415 | 34113.15 | 55922.32 |
| 07G06 | 23 | 55 | 54.90 | 99.83% | 14486.592 | 47310.14 | 84725.57 |
| 07G07 | 26 | 81 | 79.26 | 97.85% | 6858.002 | 58310.50 | 126217.21 |
| 07G10 | 27 | 112 | 111.78 | 99.81% | 11013.279 | 37674.58 | 87048.43 |
| 07G12 | 21 | 75 | 73.61 | 98.15% | 7133.251 | 32159.58 | 126987.39 |
| 07G13 | 21 | 82 | 81.59 | 99.50% | 11938.918 | 32602.53 | 59753.91 |
| 07G14 | 32 | 78 | 77.91 | 99.88% | 4114.720 | 28625.73 | 65652.98 |
| 07G15 | 32 | 104 | 103.74 | 99.75% | 10521.086 | 49633.38 | 90947.69 |
| 07G16 | 33 | 104 | 97.74 | 93.98% | 11396.049 | 51016.56 | 165876.56 |
| 07G17 | 29 | 100 | 99.71 | 99.71% | 1236.955 | 33875.89 | 111973.53 |
| 07G18 | 14 | 44 | 43.70 | 99.31% | 4134.006 | 23755.78 | 46592.26 |
| 07G19 | 34 | 112 | 111.42 | 99.48% | 4551.512 | 56389.17 | 130002.55 |
| 07G20 | 27 | 81 | 80.72 | 99.65% | 1193.574 | 35844.04 | 86727.51 |
| 07G21 | 22 | 72 | 71.01 | 98.62% | 13109.427 | 31728.63 | 76416.08 |
| 07G22 | 30 | 91 | 87.74 | 96.41% | 8272.397 | 35984.90 | 81386.13 |
| 07G23 | 16 | 50 | 48.81 | 97.62% | 6689.752 | 81418.59 | 152893.48 |
| 07G24 | 28 | 75 | 74.51 | 99.35% | 7090.940 | 40827.13 | 118576.96 |
| 07G25 | 26 | 75 | 74.73 | 99.64% | 5507.963 | 31427.67 | 69206.39 |
| 07G26 | 23 | 52 | 43.19 | 83.06% | 5428.220 | 33540.96 | 101684.31 |
| 07G27 | 26 | 75 | 74.25 | 99.00% | 2532.286 | 45702.87 | 98958.30 |
| 07G28 | 59 | 75 | 74.76 | 99.68% | 9342.737 | 55027.04 | 121515.06 |
| 07G29 | 91 | 104 | 103.74 | 99.75% | 1138.862 | 56844.63 | 123198.30 |
| 07G30 | 45 | 72 | 71.42 | 99.19% | 482.926 | 54391.29 | 146048.88 |
| 07G31 | 24 | 78 | 68.22 | 87.46% | 5582.570 | 59090.95 | 143043.15 |
| 07G32 | 59 | 78 | 77.88 | 99.84% | 4557.764 | 49076.43 | 263035.55 |
| 07G34 | 38 | 100 | 95.34 | 95.34% | 21120.242 | 72165.91 | 133903.62 |
| 07G35 | 38 | 75 | 71.75 | 95.67% | 5838.956 | 47407.20 | 88698.65 |
| 07G36 | 43 | 100 | 96.48 | 96.48% | 9295.770 | 69304.17 | 288785.86 |
| 07G37 | 52 | 69 | 68.99 | 99.99% | 1399.929 | 80731.00 | 133766.21 |
write_rds(solution, 'app/data/init_solution.rds')
write_rds(subset(blk, BEZ == bez_id), 'app/data/blocks.rds')
write_rds(subset(re_schulstand_df, spatial_name %in% relevant_schools), 'app/data/schools.rds')
write_rds(subset(bez, BEZ == bez_id), 'app/data/bez.rds')
block_stats = optim_kids_in_blks %>% inner_join(travel_from_blks, by=c('BLK'='BLK')) %>% inner_join(relevant_kapas, by=c('dst'='Schulnummer')) %>% inner_join(re_schulstand_df, by=c('dst'='spatial_name'))
write_rds(block_stats, 'app/data/block_stats.rds')